
###################################
## Estimating  Models
####################################
### WARNING - this part takes fairly long processing time

rm(list=ls(all=TRUE)) 
library(lavaan)

## ## load data
load("data/sample10K.RData")
names(d)
table(d$year)

## Grouping survey questions
questions <- paste("q", 1:50, sep = "")
f1 <- c(1, 2, 3, 4, 5, 10, 12, 13, 14, 17) # political liberalism 10
f2 <- c(21, 25, 27, 29, 30, 37, 40) # free market 
f3 <- c(32, 41, 42, 43, 46, 47, 48, 49, 50) # traditional values
f4 <- c(9, 11, 15, 16, 18, 19, 20) # nationalism  7
f5 <- c(24, 7, 8, 44, 45) # individual freedom    5
f6 <- c(23, 26, 28, 35, 39) # national interest
f7 <- c(22, 31, 33, 34, 36, 38) # social justice
FSet <- list(f1, f2, f3, f4, f5, f6, f7)

## ## a function that generates lavaan model syntax from a list of nubmers
genModel <- function(model.list) {
  model <- ""
  for (j in 1:length(model.list)) {
    model <- paste(model,
                   paste('factor',j,' =~ ',
                         paste(paste('q', model.list[[j]], sep=""),
                               collapse = " + "), sep = ""),
                   "\n", sep = "") 
  }
  return(model)
}

## ### Model A -- Political / Economic / Social:
modelA <- list(sort(unique(c(f1,f4,f5))),
               sort(unique(c(f2,f6,f7))),
               sort(unique(c(f3))))
fitA <- cfa(genModel(modelA), data=d[,questions], ordered = questions, std.lv = TRUE)
inspect(fitA,"cov.lv")

## ### Model B -- Political + Nationalism / Economic + Social:
modelB <- list(sort(unique(c(f1,f4,f5))),
               sort(unique(c(f2,f3,f6,f7))))
fitB <- cfa(genModel(modelB), data=d[,questions], ordered = questions, std.lv = TRUE)
inspect(fitB,"cov.lv")

## ### Model C -- One Dimensional Model:
modelC <- genModel(list(1:50))
fitC <- cfa(modelC, data=d[,questions], ordered = questions, std.lv = TRUE)

## ## Now we look at the meanings of the latent factors
summary(fitA)
parameterEstimates(fitA, ci = FALSE, standardize = TRUE)

## ## standardized estimates
lt <- lavPredict(fitA)
d$lt1 <- lt[,1]*-1     # political liberalism
d$lt2 <- lt[,2]*-1  # market ideas
d$lt3 <- lt[,3]*-1  # anti-traditionalism
coef <- parameterEstimates(fitA, ci = FALSE, standardize = TRUE)$std.all[1:49]
coef[(length(modelA[[1]])+1):49] <- coef[(length(modelA[[1]])+1):49] * (-1)
coef[1:22] <- coef[1:22] * (-1)

## ## factor1: 1-18 (18);
## ## factor2: 19-43 (25);
## ## factor3: 44-50 (7)

## ## PCA
pca.out<-prcomp(d[,questions], center=TRUE,scale=TRUE)
d$PC1 <- pca.out$x[,1] * -1 

save(d, coef, questions, f1, f2, f3, f4, f5, f6, f7, FSet, 
     genModel, modelA, fitA, fitB, fitC, pca.out, file = "output/result_cfa_dropQ6.RData")

###################################
## Bootstrapping (slow)
####################################

rm(list=ls(all=TRUE)) 
load("output/result_cfa_dropQ6.RData")
questions <- paste("q", 1:50, sep = "")
data <- d[, questions]

library(doParallel)
library(foreach)
set.seed(02139)
cl<-makeCluster(20) 
registerDoParallel(cl)
sims <- 200
ndim <- 3
nobs <- dim(d)[1]
mod1 <- genModel(modelA)
output <- foreach (i = 1:sims, .combine = c,
                   .inorder = FALSE,
                   .packages = c("lavaan")) %dopar% { 
                     fit <- cfa(mod1, data=data[sample(1:nobs, nobs, replace = TRUE),],
                                std.lv = TRUE, ordered = questions)
                     para <- parameterEstimates(fit, ci = FALSE,
                                                standardize = TRUE)$std.all[1:49]  ##change to 49
                     lt <- lavPredict(fit, newdata = data)
                     return(list(para, lt))
                   }
stopCluster(cl)
## flip signs
result.para <- matrix(NA, 49, sims)
result.lv <- array(NA, dim = c(nobs, ndim, sims))
for (i in 1:sims) {
  result.para[, i] <- output[[(i-1)*2+1]]
  result.lv[, , i] <- output[[i*2]] 
}
result.para[23:49,] <- result.para[23:49,] * (-1)
result.lv[, 2:3, ] <- result.lv[, 2:3, ] * (-1)
save(output, result.para, result.lv, file = "output/result_cfa_boot_dropQ6.RData")

###############################
## Figure 6-8: CFA Estimates ##
###############################
rm(list=ls(all=TRUE)) 
load("output/result_cfa_dropQ6.RData")
load("output/result_cfa_boot_dropQ6.RData")

a1 <- length(modelA[[1]])
a2 <- length(modelA[[2]])
a3 <- length(modelA[[3]])
qorder <- unlist(modelA)
ft <- rep(NA, 49) # original grouping
for (i in 1:50) {
  for (j in 1:7) {
    if (qorder[i]%in%FSet[[j]]) {
      ft[i] <- j
    }
  } 
}

## Estimates
est.out <- cbind.data.frame("No." = qorder,
                            "L.V." = c(rep(1,a1),rep(2,a2),rep(3,a3)), 
                            "factor" = ft,
                            "Est." = coef,
                            "S.E." = apply(result.para, 1, sd),
                            "CI_lower" = apply(result.para, 1, quantile, 0.025),
                            "CI_upper" = apply(result.para, 1, quantile, 0.975))
est.out2 <- matrix(NA, max(a1,a2,a3),11)
est.out2[1:a1,1:3] <- as.matrix(est.out[1:a1,c(1,4,5)])
est.out2[1:a2,5:7] <- as.matrix(est.out[(a1+1):(a1+a2),c(1,4,5)])
est.out2[1:a3,9:11] <- as.matrix(est.out[(a1+a2+1):49,c(1,4,5)])
colnames(est.out2) <- rep(c("No.","Coef.","S.E.",""),3)[-12]



## Figures 6-8
qcontent2 <- c(
  "People should not have universal suffrage if they have not been educated about democracy.",
  "Universality of human rights take precedence over sovereignty.",
  "When events that have major repercussions for the safety and security occur, the government should\n freely disseminate information even if information disclosure increases the risks of unrest.",
  "Western multiparty systems are unsuitable for China in its current state.",
  "Indiscriminately imitating (systems of) western-style freedom of speech will lead to\n social disorder in China.",
  "It is preferable to let universities recruit students by themselves than to have a unified national\n college entrance examination system.",
  "Religoius adherents should be allowed to conduct missionary  work in\n nonreligious spaces.",
  "Primary school, secondary school, and college students should all participate in government\n organized military training.",
  "National unity and territorial integrity are the highest interest of society.",
  "Even if procedural rules are violated in the process of investigation and evidence gathering, those\n who have actually committed crimes should be punished.",
  "The state has an obligation to provide foreign aid.",
  "It is acceptable to besmirch the images of national leaders and founding leaders in literary and\n artistic works.",
  "When laws fail to fully constrain criminal behaviors, people have the right to impose their own\n punishments for these behaviors.",
  "Media should be allowed to represent the voice of a particular social stratum or interest group.",
  "If it has sufficient state capabilities, China has the right to take any action to defend its\n national interests.",
  "Force should be used to reunify Taiwan with China if conditions permit.",
  "Lawyers should do their utmost to defend clients even if the client has committed a crime.",
  "Chinese citizens should be allowed to hold foreign citizenship.",
  "It is impossible for western countries led by the United States to tolerate the rise of China\n into a major power.",
  "The state should take measures to train and support athletes so they can win glory for the country in\n various international competitions.",
  "The minimum wage should be set by the state.",
  "The fruits of China's economic development since reform and opening up are enjoyed by a small\n group of people; most people have not received much benefit.",
  "In the decision-making of major (infrastructure) projects, individual interests should give way\n to social interests.",
  "Wasting food is an individual freedom.",
  "If the price of pork is too high, the government should intervene.",
  "A high tariff should be imposed on imported goods that are also produced domestically to\n protect domestic industries.",
  "Education should be public to the greatest extent.",
  "The interests of state-owned enterprises are part of the national interest.",
  "Attempting to control real estate prices will undermine economic development.",
  "The primary means to improve the lives of the low-income people is to give them fiscal\n subsidies and support.",
  "A rich person deserves better medical services.",
  "High income earners should disclose the sources of their income.",
  "People who make money through gains from financial investments contribute less to then society\n than people make money through labor.",
  "It is better to sell state-owned enterprises to capitalists than to let them go bankrupt.",
  "Sectors related to national security and important to the national economy and people's\n livelihoods must be controlled by state-owned enterprises.",
  "The process of capital accumulation is always accompanied by harm to the working class.",
  "Individuals should be able to own, buy and sell land.",
  "The government should adopt higher grain purchasing prices to boost the income of peasants.",
  "Foreign capital in China should enjoy the same treatment as national capital.",
  "Natural monopolies that emerge out of market competitions are harmless.",
  "Two adults should be free to engage in voluntary sexual behavior regardless of their marital status.",
  "One should not openly comment on the shortcomings of their elders.",
  "The modern Chinese society needs Confucianism.",
  "The fundamental standard to evaluate the value of a work of art is whether it is liked by the masses.",
  "Even with population pressures, the state and the society have no right to interfere in the decision\n to have a child, or how many children to have.",
  "The Eight Diagrams (Bagua) in The Book of Changes (Zhouyi) can explain many things well.",
  "The perspective of traditional Chinese medicine on human health is superior to that of modern\n mainstream medical science.",
  "It is unnecessary to push forward the simplification of Chinese characters.",
  "Traditional Chinese classics should be the basic education material for children.",
  "I will recognize the relationship between my child and a same-sex partner if it is a voluntary choice." 
)


## plotting
ft1 <- est.out[1:a1, ]
ft2 <- est.out[(a1+1):(a1+a2),]
ft3 <- est.out[(a1+a2+1):49,]
draw <- rbind(ft1[order(ft1[,4], decreasing = TRUE),],
              ft2[order(ft2[,4], decreasing = TRUE),],
              ft3[order(ft3[,4], decreasing = TRUE),])

pdf("graphs/cfa_est_factor1_dropQ6.pdf",height = 9, width = 12)
par(mar = c(8,39,1,1), las = 1)
num <- 1:a1
plot(1, type = "n", xlim = c(-0.8, 0.8), ylim = c(a1,1),
     xlab = "", ylab = "", axes = FALSE, cex.axis = 1.5)
abline(h = 0:51, col = "gray90", cex = 0.8)
abline(v = seq(-0.8,0.8,0.1), col = "gray90", cex = 0.8)
abline(v = 0, col = "gray90", lwd = 5)
box(); axis(1, at = seq(-0.8, 0.8, 0.4))
for (i in num) {
  points(draw[i, 4], i,  pch =16, col = 1, cex = 1.2)
  lines(draw[i,6:7], c(i,i), col = 1, lwd = 4)
}
axis(2, at = 1:length(num), labels = qcontent2[draw[num,1]], cex.axis = 1)
mtext("Coefficient",1,2.5,cex = 1.5)
text(0.2, 1.5, "Latent Factor 1", cex = 2)
title(main = "CFA: Our Model", line = -41.5, cex.main = 1.5, adj = 1)
graphics.off()

pdf("graphs/cfa_est_factor2_dropQ6.pdf",height = 12, width = 12)
par(mar = c(8,39,1,1), las = 1)
num <- (a1+1):(a1+a2)
plot(1, type = "n", xlim = c(-0.8, 0.8), ylim = c((a1+a2), (a1+1)),
     xlab = "", ylab = "", axes = FALSE, cex.axis = 1.5)
abline(h = 0:51, col = "gray90", cex = 0.8)
abline(v = seq(-0.8,0.8,0.1), col = "gray90", cex = 0.8)
abline(v = 0, col = "gray90", lwd = 5)
box(); axis(1, at = seq(-0.8, 0.8, 0.4))
for (i in num) { 
  points(draw[i, 4], i, col = 1, pch = 16, cex = 1.2)
  lines(draw[i,6:7], c(i,i), col = 1, pch = 16, lwd = 3)
}
axis(2, at = num, labels = qcontent2[draw[num,1]], cex.axis = 1)
mtext("Coefficient",1,2.5,cex = 1.5)
text(0.2, 41.5, "Latent Factor 2", cex = 2)
title(main = "CFA: Our Model", line = -57, cex.main = 1.5, adj = 1)
graphics.off()

pdf("graphs/cfa_est_factor3_dropQ6.pdf",height = 4, width = 12)
par(mar = c(8,39,1,1), las = 1)
num <- (a1+a2+1):50
plot(1, type = "n", xlim = c(-0.8, 0.8), ylim = c(50.5, (a1+a2+1)),
     xlab = "", ylab = "", axes = FALSE, cex.axis = 1.5)
abline(h = 0:51, col = "gray90", cex = 0.8)
abline(v = seq(-0.8,0.8,0.1), col = "gray90", cex = 0.8)
abline(v = 0, col = "gray90", lwd = 5)
box(); axis(1, at = seq(-0.8, 0.8, 0.4))
for (i in num) { 
  points(draw[i, 4], i, col = 1, pch = 16, cex = 1.2)
  lines(draw[i,6:7], c(i,i), col = 1, pch = 16, lwd = 3)
}
axis(2, at = num, labels = qcontent2[draw[num,1]], cex.axis = 1)
mtext("Coefficient",1,2.5,cex = 1.5)
text(0.2, 49.5, "Latent Factor 3", cex = 2)
title(main = "CFA: Our Model", line = -17.5, cex.main = 1.5, adj = 1)
graphics.off()


##################################################
## Figure 9: correlations between latent traits ##
##################################################

rm(list=ls(all=TRUE)) 
load("output/result_cfa_dropQ6.RData")

library(ggplot2)
library(GGally)
p <- ggpairs(d[,c("lt1","lt2","lt3","PC1")],
             columnLabels = c("Political Liberalism/Non-Nationalism",
                              "Pro-Market Values",
                              "Traditional Values * (-1)","1st Principal Component"),
             lower = list(continuous = wrap("points", alpha = 0.3, size=0.2),
                          combo = wrap("dot", alpha = 0.4, size=0.2)),
             upper = list(continuous = wrap("cor", size = 8, colour = 1)))  +
  theme(axis.text = element_text(size = 14, vjust = 1, color = "black"),
        axis.title =element_text(size=18, face="bold")) 
pdf("graphs/cfa_latents_dropQ6.pdf", height= 9, width = 9)
print(p, left = 0.2, bottom = 0.2)
graphics.off()

###################################
## Individual-level Correlations ##
###################################

rm(list=ls(all=TRUE)) 
load("output/result_cfa_dropQ6.RData")
load("output/result_cfa_boot_dropQ6.RData")

## ## income
d$inc<-d$income
d$inc[which(d$inc%in%c(1,2))]<-1.5 # 0-25K, 25-50K --> 0-50K
d$inc[which(d$inc%in%c(3,4,5))]<-3.5 # 50-75K, 75-100K, 100-150K --> 50-150K
(out <- aggregate(lt1 ~ inc, data = d, mean))

## ## education
d[which(d$educ==1),]$educ <- 2
(out <- aggregate(lt1 ~ educ, data = d[which(d$age>=40 & d$female ==0),], mean))

## ## stacking bootstrapped estimates together
nobs <- dim(result.lv)[1]
dim3 <- dim(result.lv)[3]
ndim <- 3
s <- matrix(NA, nobs * dim3, ndim)
for (i in 1:nobs) {     
  s[((i-1)*dim3+1):(i*dim3),] <- t(result.lv[i,,]) 
}
s <- data.frame(s)
colnames(s) <- c("lt1","lt2","lt3")

## ## bootstrap function
mean.boot <- function(X, nsims=100) {
  ##     ## variable names
  ##     ## X  grouping variable
  ##     ## nsims  number of simulations
  nX<-unique(d[,X]) # unique levels
  
  ##     ## create IDs
  id.obs <-1:nobs
  id.dim3 <- 1:dim3
  ndim <- 3
  
  ##     ## bootstrap
  results<-array(NA,dim = c(length(nX),ndim,nsims))
  for (i in 1:nsims) {
    id.smp <- sample(1:nobs, nobs, replace = TRUE) # which unit 
    ss <- d[id.smp, ]
    results[,,i]<-do.call("rbind",by(ss,ss[,X],function(mat) apply(mat[,c("lt1","lt2","lt3")],2,mean)))
    if (i%%10==0) cat(i) else cat(".")
  }
  
  ##     ## summary
  output<-array(NA,dim = c(length(nX), ndim,4)) ## mean, se, CI_l, CI_u 
  output[,,1]<-do.call("rbind",by(d,d[,X],function(mat) apply(mat[,c("lt1","lt2","lt3")],2,mean)))
  output[,,2]<-apply(results,c(1,2),sd)
  output[,,3]<-apply(results,c(1,2), quantile,c(0.025))
  output[,,4]<-apply(results,c(1,2),quantile,c(0.975))
  return(output)
}

## ## bootstrap
nsims <- 1000 
ses.female <- mean.boot(X="female", nsims = nsims)
ses.edu <- mean.boot(X="educ", nsims= nsims)
ses.income<-mean.boot(X="inc", nsims= nsims)
ses.age <- mean.boot(X="age", nsims= nsims)
save(ses.female, ses.edu, ses.income, ses.age,
     file="output/result_cfa_ind_dropQ6.RData")

###############################
# Figures 10, 11. Correlations
###############################

rm(list=ls(all=TRUE)) 
load("output/result_cfa_ind_dropQ6.RData")

mycol = c(1, "gray50", "gray80")
mypch = c(16, 17, 15)

## Education
pdf("graphs/ses_educ_dropQ6.pdf")
output <- ses.edu
offset <- seq(-0.15,0.15,length.out=3)
par(mar=c(7.5,4.5,2,2))
plot(1, type="n",ylab="", xlab="Education", ylim=c(-0.6,0.6), xlim=c(0.5,3.5),
     cex.lab=1.5, cex.axis=1.5, xaxt="n")
abline(h=0)
axis(1,at=1:3, labels=c("Below\nCollege","College\n","Above\nCollege"),
     cex.lab = 1.5, cex.axis=1.2, tick=FALSE,line=1)
axis(1,at=1:3, labels = NA, tick = TRUE)
for(i in seq(-2,2,by=0.05)) abline(h=i,col="#AAAAAA50",lwd=1)
for(i in seq(-0.5,3.5,by=0.25)) abline(v=i,col="#AAAAAA50",lwd=1)
n<-dim(output)[1]
for (k in 1:3) { # four latent variables
  for (i in 1:n) {lines(c(i,i) + offset[k],c(output[i,k,3],output[i,k,4]),lwd=4, col = mycol[k])}
  points(c(1:n) + offset[k],output[,k,1],pch=mypch[k],cex=2, col = mycol[k])
}
legend("topleft", legend = c("Political Liberalism/Non-Nationalism",
                             "Pro-Market Values",
                             "Traditional Values * (-1)"),
       col=mycol, pch=mypch, lty=1, bty = "n", lwd = 3, cex = 1.3)
title(main = "CFA: Our Model", line = -31.5)
graphics.off()


## Income
pdf("graphs/ses_income_dropQ6.pdf")
output <- ses.income
offset <- seq(-0.15,0.15,length.out=3)
par(mar=c(7.5,4.5,2,2))
plot(1, type="n",ylab="", xlab="Annual Income", ylim=c(-0.6,0.8), xlim=c(0.5,4.5),
     cex.lab=1.5, cex.axis=1.5, xaxt="n")
abline(h=0)
axis(1,at=1:4, labels=c("0-50K","50-150K","150-300K",">300K"), cex.axis=1.5,tick=F,line=0)
axis(1,at=1:4, labels = NA, tick = TRUE)
for(i in seq(-2,2,by=0.05)) abline(h=i,col="#AAAAAA50",lwd=1)
for(i in seq(-0.5,4.5,by=0.25)) abline(v=i,col="#AAAAAA50",lwd=1)
n<-dim(output)[1]
for (k in 1:3) { # three latent variables
  for (i in 1:n) {lines(c(i,i) + offset[k],c(output[i,k,3],output[i,k,4]),lwd=4, col = mycol[k])}
  points(c(1:n) + offset[k],output[,k,1],pch=mypch[k],cex=2, col = mycol[k])
}
legend("topleft", legend = c("Political Liberalism/Non-Nationalism",
                             "Pro-Market Values",
                             "Traditional Values * (-1)"),
       col=mycol, pch=mypch, lty=1, bty = "n", lwd = 3, cex = 1.3)
title(main = "CFA: Our Model", line = -31.5)
graphics.off()



## Age
pdf("graphs/ses_age_dropQ6.pdf",width = 20)
output <- ses.age
legends = c("Political Liberalism/Non-Nationalism","Pro-Market Values","Traditional Values * (-1)")
par(mar=c(13,4.5,2,2), mfcol = c(1,3))
for (k in 1:3) {
  plot(1, type="n",ylab="", xlab="Age", ylim=c(-0.6,0.6), xlim=c(-1,45),
       cex.lab=2.5, cex.axis=2, xaxt="n")
  abline(h=0)
  axis(1,at=seq(1,43, by = 3), labels=seq(18, 60, by =3),
       cex.axis= 2,tick=TRUE,line=0)
  for(i in seq(-2,2,by=0.05)) abline(h=i,col="#AAAAAA50",lwd=1)
  for(i in seq(-2,45,by=3)) abline(v=i,col="#AAAAAA50",lwd=1)
  n<-dim(output)[1]
  for (i in 1:n) {lines(c(i,i),c(output[i,k,3],output[i,k,4]),
                        lwd= 4, col = 1)}
  points(c(1:n),output[,k,1],pch=mypch[k],cex=2, col = 1)
  title(legends[k], line = -3, cex.main = 2.5)
}
title(main = "CFA: Our Model", line = -46.5, cex.main =3, adj = 1)
graphics.off()
